Using data about the locations of firehouses and fires occurring in New York City, this assignment aims to explore whether response times to fires differ across the city. Second, we will try to focus on one possible variable that could affect response times – the distance from the firehouse – and see whether we find the (expected) effect.
NYC Open Data has data on all incidents responded to by fire companies and location of all 218 firehouses in NYC. The fire dataset is updated annually, and thus far only data from 2013 to 2018 is contained, and We are going to look at building fires only.
fire <- read_csv("data/building_fires.csv") %>%
filter(IM_INCIDENT_KEY != 59533504) # this fire got wrong geocoding
firehouses <- read_csv("data/FDNY_Firehouse_Listing.csv") %>%
drop_na(c(Longitude, Latitude))
1 entry in fire dataset cannot be correctly geocoded and 5 entries in firehouse dataset contain missing information on the spatial coordinates. They are excluded here.
Below is a leaflet map of the highest severity fires.
fire_highest <- fire %>%
filter(HIGHEST_LEVEL_DESC %in% c("7 - Signal 7-5", "75 - All Hands Working"))
cleanProperty <- function(x) {
return(substr(x, 6, nchar(x)))
}
fire_highest <- fire_highest %>%
rowwise() %>%
mutate(property = cleanProperty(PROPERTY_USE_DESC))
fire_highest$date <- substr(fire_highest$INCIDENT_DATE_TIME, 1, 10)
popupcontent <- paste("When:", fire_highest$date, "<br/>",
"Where:", fire_highest$address, "<br/>",
"What:", fire_highest$property, "<br/>")
leaflet(fire_highest) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
setView(lng = -73.9966827, lat = 40.7331564, zoom = 12) %>%
addCircles(lng = ~lon, lat = ~lat,
col="#627B76",
radius = 3,
popup = popupcontent)
Start with the previous map, the map below distinguishs the markers of the fire locations by PROPERTY_USE_DESC, i.e. what kind of property was affected. Because there are too many categories, property is recoded into dwelling, business, restaurant, and other. Information on type of affected property is now contained in the popup information.
pDwelling <- c("419 - 1 or 2 family dwelling",
"439 - Boarding/rooming house, residential hotels",
"962 - Residential street, road or residential driveway",
"460 - Dormitory-type residence, other",
"464 - Barracks, dormitory",
"429 - Multifamily dwelling",
"400 - Residential, other",
"449 - Hotel/motel, commercial",
"899 - Residential or self-storage units",
"459 - Residential board and care")
pBusiness <- c("579 - Motor vehicle or boat sales, services, repair",
"559 - Recreational, hobby, home repair sales, pet store",
"519 - Food and beverage sales, grocery store",
"557 - Personal service, including barber & beauty shops",
"581 - Department or discount store",
"500 - Mercantile, business, other",
"511 - Convenience store",
"539 - Household goods, sales, repairs",
"549 - Specialty shop",
"529 - Textile, wearing apparel sales",
"141 - Athletic/health club",
"162 - Bar or nightclub",
"183 - Movie theater",
"112 - Billiard center, pool hall",
"142 - Clubhouse",
"181 - Live performance theater",
"144 - Casino, gambling clubs",
"143 - Yacht Club",
"121 - Ballroom, gymnasium",
"140 - Clubs, other")
pRestaurant <- c("161 - Restaurant or cafeteria",
"564 - Laundry, dry cleaning",
"162 - Bar or nightclub",
"160 - Eating, drinking places, other")
recodeProperty <- function (x) {
if (x %in% pDwelling) {
return("dwelling")
} else if (x %in% pBusiness) {
return("business")
} else if (x %in% pRestaurant) {
return("restaurant")
} else {
return("other")
}
}
fire_highest <- fire_highest %>%
rowwise() %>%
mutate(propertyCategory = recodeProperty(PROPERTY_USE_DESC))
popupcontent <- paste("When:", fire_highest$date, "<br/>",
"Where:", fire_highest$address, "<br/>",
"What:", fire_highest$property, "<br/>",
"Category:", fire_highest$propertyCategory)
pal = colorFactor(c('#627B76','#88070B','#F6D672','#071088'),
domain = fire_highest$propertyCategory)
color_offsel1 = pal(fire_highest$propertyCategory)
leaflet(fire_highest) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
setView(lng = -73.9966827, lat = 40.7331564, zoom = 12) %>%
addCircles(lng = ~lon, lat = ~lat,
col= color_offsel1,
radius = 3,
popup = popupcontent) %>%
addLegend("topleft",
pal = pal,
values = ~fire_highest$propertyCategory,
title = "Category")
Although we are asked to include property type in the popup, I think it looks more concise to use color to indicate the main category (such as dwelling, restaurant, etc.) and use the popup to show detailed property type (such as multiplefamily dwelling, single family dwelling, etc). So I will not include prepority type (i.e. dwelling, restaurant, etc.) in the popup for maps after this point.
Add marker clustering.
leaflet(fire_highest) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
setView(lng = -73.9966827, lat = 40.7331564, zoom = 12) %>%
addCircleMarkers(lng = ~lon, lat = ~lat,
color = color_offsel1,
popup = popupcontent,
radius = 3,
clusterOptions = markerClusterOptions()) %>%
addLegend("topleft",
pal = pal,
values = ~fire_highest$propertyCategory,
title = "Category")
This graph contains the locations of the 218 firehouses in New York City. The size of the circle markers is adjuested by severity, more severe incidents have larger circles on the map. Two layers (“Incidents”, “Firehouses”) that allow the user to select which information to show are available.
popupcontent <- paste("When:", fire_highest$date, "<br/>",
"What:", fire_highest$property, "<br/>",
"Where:", fire_highest$address, "<br/>",
"Number of units on scene:", fire_highest$UNITS_ONSCENE, "<br/>",
"Duration:", round(fire_highest$TOTAL_INCIDENT_DURATION/60, digits = 0), "minutes") # convert second to minute
houseIcon <- icons(
iconUrl = "./data/icon_firehouse.png",
iconWidth = 12, iconHeight = 12,
iconAnchorX = 8, iconAnchorY = 8
)
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
setView(lng = -73.9966827, lat = 40.7331564, zoom = 12) %>%
addMarkers(data = firehouses,
lng = ~Longitude, lat = ~Latitude,
icon = houseIcon,
group = "Firehouses") %>%
addCircleMarkers(data = fire_highest,
lng = ~lon, lat = ~lat,
col = color_offsel1,
radius = ~UNITS_ONSCENE/3,
popup = popupcontent,
weight = 0,
group = "Incidents") %>%
addLegend("topleft",
pal = pal,
values = fire_highest$propertyCategory,
title = "Category",
group = "Incidents",
opacity = 0.3) %>%
addLayersControl(overlayGroups = c("Firehouses", "Incidents"),
options = layersControlOptions(collapsed = T))
Note: Circle size represents fire severity, fire incidents with more unites on scene are represented by a larger circle.
Icon picture is from https://icons8.com/
We now want to investigate whether the distance of the incident from the nearest firehouse varies across the city.
For all incident locations (independent of severity), identify the nearest firehouse and calculate the distance between the firehouse and the incident location. Provide a scatter plot showing the time until the first engine arrived. Visualize the patterns separately for severe and non-severe incidents.
fire <- read_csv("data/building_fires.csv") %>%
filter(IM_INCIDENT_KEY != 59533504) %>%
drop_na(c(lat, lon, ARRIVAL_DATE_TIME, INCIDENT_DATE_TIME, HIGHEST_LEVEL_DESC)) %>%
rename(fire_lat = lat, fire_lon = lon)
firehouses <- read_csv("data/FDNY_Firehouse_Listing.csv") %>%
drop_na(c(Longitude, Latitude)) %>%
rename(firehouse_lat = Latitude, firehouse_lon = Longitude)
coordinates(fire) <- ~fire_lat+fire_lon
coordinates(firehouses) <- ~firehouse_lat+firehouse_lon
fire_sp <- SpatialPoints(fire)
firehouses_sp <- SpatialPoints(firehouses)
fire$nearest_firehouse_dist <- apply(gDistance(firehouses_sp, fire_sp, byid=TRUE), 1, min)
fire$nearest_firehouse_dist <- fire$nearest_firehouse_dist*100000 # convert to meter
fire$nearest_firehouse <- apply(gDistance(firehouses_sp, fire_sp, byid=TRUE), 1, which.min)
# compute wait time
fire$wait <- round(as.duration(mdy_hms(fire$INCIDENT_DATE_TIME) %--% mdy_hms(fire$ARRIVAL_DATE_TIME))/dminutes(1),1) # in minute
# recode severity
recodeSeverity <- function (x) {
if (x %in% c("0 - Initial alarm", "1 - More than initial alarm, less than Signal 7-5")) {
return(0)
} else if (x %in% c("11 - First Alarm")) {
return(1)
} else if (x %in% c("2 - 2nd alarm", "22 - Second Alarm")) {
return(2)
} else if (x %in% c("3 - 3rd alarm", "33 - Third Alarm")) {
return(3)
} else if (x %in% c("4 - 4th alarm", "44 - Fourth Alarm")) {
return(4)
} else if (x %in% c("5 - 5th alarm", "55 - Fifth Alarm")) {
return(5)
} else if (x %in% c("7 - Signal 7-5", "75 - All Hands Working")) {
return(6)
} else {
return(NA)
}
}
fire <- fire %>%
as.data.frame() %>%
rowwise() %>%
mutate(severity = recodeSeverity(HIGHEST_LEVEL_DESC))
fire$severity <- factor(fire$severity, levels = c(0,1,2,3,4,5,6))
fire$hovertext <- paste("<b>",fire$address,"</b><br><br>",
"Severity level:", fire$severity, "<br>",
"Wait time:", round(fire$wait,1), "minutes<br>",
"Distance to firehouse:", round(fire$nearest_firehouse_dist,0), "meters<br>")
plot_ly(data = fire,
x = ~nearest_firehouse_dist, y = ~wait,
type = "scatter", mode = "markers",
color = ~severity,
colors = c("#F1948A","#EC7063","#E74C3C","#CB4335","#B03A2E","#943126","#641E16"),
opacity = 0.5,
hoverinfo = 'text',
text = ~hovertext
) %>%
layout(
yaxis = list(title = "wait time until the first engine arrived (in minute)", range=c(0,95)),
xaxis = list(title = "distance to the nearest firehouse (in meter)"),
legend = list(
orientation = "h", x = 0.5, y = 0.99,
title=list(text="<b> Severity Level: </b>")
),
title = list(
text = "Distance to firehouse and wait time",
font=list(size=20),
x = 0.05, y = 0.99,
xanchor = "left"
)
)
Note: please zoom in to exclude outliers and see the pattern more clear
Because there are some outliers (i.e. some incidents waited a very long time or were very far away from firehouse), the relation between distance to firehouse and wait time was not very clear at the first galance.
However, when zoom in to incidents with wait time less than 20 and distance less then 3000, we can see a weak positive relation - incidents which occured farther away from firehouse tend to wait longer time for rescue.
By clicking items in the legend, we can examine if this pattern holds for incidients of different severity levels and compare severe and non-severe incidents. From the result we can see that this weak positive corelation is generally true for different incidents.
The map below investigate whether the type of property affected (PROPERTY_USE_DESC) or fire severity (HIGHEST_LEVEL_DESC) has anything to do with response time.
fire$date <- substr(fire$INCIDENT_DATE_TIME, 1, 10)
fire <- fire %>%
rowwise() %>%
mutate(propertyIsDwelling = ifelse(PROPERTY_USE_DESC %in% pDwelling, "dwelling", "not dwelling"),
property = cleanProperty(PROPERTY_USE_DESC),
highestSeverity = ifelse(HIGHEST_LEVEL_DESC %in% c("7 - Signal 7-5", "75 - All Hands Working"), "highest severity", "other"))
fire$popupcontent <- paste(
"When:", fire$date, "<br/>",
"Where:", fire$address, "<br/>",
"What:", fire$property, "<br/>",
"Severity level:", fire$severity, "<br/>",
"Wait time:", fire$wait)
pal1 = colorFactor(c('#ffb14e','#6836FF'),
domain = fire$propertyIsDwelling)
color_offsel1 = pal1(fire$propertyIsDwelling)
pal2 = colorFactor(c('#A7003B','#266C1B'),
domain = fire$highestSeverity)
color_offsel2 = pal2(fire$highestSeverity)
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
setView(lng = -73.9966827, lat = 40.7331564, zoom = 12) %>%
addCircleMarkers(data = fire,
lng = ~fire_lon, lat = ~fire_lat,
col = color_offsel1,
radius = ~wait,
popup = ~popupcontent,
weight = 0,
group = "Dwelling?") %>%
addCircleMarkers(data = fire,
lng = ~fire_lon, lat = ~fire_lat,
col = color_offsel2,
radius = ~wait,
popup = ~popupcontent,
weight = 0,
group = "Highest severity?") %>%
addLegend("topleft",
pal = pal1,
values = fire$propertyIsDwelling,
title = "Property Category",
group = "Dwelling or not?",
opacity = 0.3) %>%
addLegend("topleft",
pal = pal2,
values = fire$highestSeverity,
title = "Severity Level",
group = "Highest severity or not?",
opacity = 0.3) %>%
addLayersControl(baseGroups = c("Dwelling?", "Highest severity?"),
options = layersControlOptions(collapsed = F))
Below is a faceted choropleth map indicating how response times have developed over the years.
ny_sf <- readOGR(dsn = "data/ny_zipcode", layer = "zt36_d00")
mapByYear <- function(yr){
fire$year <- year(mdy_hms(fire$INCIDENT_DATE_TIME))
fire_year <- fire %>%
drop_na(wait) %>%
group_by(year, ZIP_CODE) %>%
summarise(avg_wait = mean(wait)) %>%
filter(year==yr) # change to a parameter later
shapefile <- ny_sf
shapefile@data$ZCTA <- as.character(shapefile@data$ZCTA)
fire_year$ZIP_CODE <- as.character(fire_year$ZIP_CODE)
combined <- shapefile@data %>%
left_join(fire_year, by = c("ZCTA" = "ZIP_CODE"))
shapefile@data <- combined
bins <- c(0, 1, 3, 5, 10, 20, 30, Inf)
pal <- colorBin("YlOrRd", domain = fire_year$avg_wait, bins = bins, na.color="transparent")
#popupcontent <- paste("zipcode:", as.character(shapefile@data$ZCTA), "<br/>",
# "average wait time:", round(shapefile@data$avg_wait,1), "minutes<br/>")
popupcontent <- sprintf("<strong>zipcode: %s</strong><br/>%s",
shapefile@data$ZCTA,
ifelse(is.na(shapefile@data$avg_wait),"",paste("average wait time:",round(shapefile@data$avg_wait,1),"minutes"))) %>%
lapply(htmltools::HTML)
plt <- leaflet(shapefile) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
setView(lng = -73.9966827, lat = 40.7331564, zoom = 10) %>%
addPolygons(fillColor = ~pal(avg_wait),
stroke = FALSE,
smoothFactor = 0.5,
fillOpacity = 0.7,
label = popupcontent,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "13px",
direction = "auto"
)) %>%
addLegend(pal=pal,
values=~avg_wait,
opacity=0.9,
title = paste("Year:", yr, "<br/>",
"Average Wait Time <br/> (in minute)"),
position = "topleft" )
#addTitle(as.character(yr), # Seperately, it works well, but cannot use it in facet fashion
# color = "#6B6868",
# fontSize = "30px",
# topPosition = -2,
# fontFamily = "serif")
return(plt)
}
map_2013 <- mapByYear(2013)
map_2014 <- mapByYear(2014)
map_2015 <- mapByYear(2015)
map_2016 <- mapByYear(2016)
map_2017 <- mapByYear(2017)
map_2018 <- mapByYear(2018)
sync(map_2013, map_2014, map_2015, map_2016, map_2017, map_2018)
From the facet maps we can see that, central Brooklyn tend to have shorter average wait time and some areas in lower Manhattan tend to have longer average wait time. Compare to other years, in 2018, several areas in NYC have longer wait time than they did before.